Forecasting Victory: 2024 League of Legends Worlds Matches Predictions¶

Name(s): Jiahao Cheng

Website Link: https://cjhjw.github.io/EECS398-Final-Project/

In [245]:
import pandas as pd
import numpy as np

import plotly.express as px
pd.options.plotting.backend = 'plotly'

from lec_utils import * # Feel free to uncomment and use this. It'll make your plotly graphs look like ours in lecture!

Step 1: Introduction¶

In [246]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
data = pd.read_csv('2024_LoL_esports_match_data_from_OraclesElixir.csv')
data.head(12)
Out[246]:
gameid datacompleteness url league year split playoffs date game patch participantid side position playername playerid teamname teamid champion ban1 ban2 ban3 ban4 ban5 pick1 pick2 pick3 pick4 pick5 gamelength result kills deaths assists teamkills teamdeaths doublekills triplekills quadrakills pentakills firstblood firstbloodkill firstbloodassist firstbloodvictim team kpm ckpm firstdragon dragons opp_dragons elementaldrakes opp_elementaldrakes infernals mountains clouds oceans chemtechs hextechs dragons (type unknown) elders opp_elders firstherald heralds opp_heralds void_grubs opp_void_grubs firstbaron barons opp_barons firsttower towers opp_towers firstmidtower firsttothreetowers turretplates opp_turretplates inhibitors opp_inhibitors damagetochampions dpm damageshare damagetakenperminute damagemitigatedperminute wardsplaced wpm wardskilled wcpm controlwardsbought visionscore vspm totalgold earnedgold earned gpm earnedgoldshare goldspent gspd gpr total cs minionkills monsterkills monsterkillsownjungle monsterkillsenemyjungle cspm goldat10 xpat10 csat10 opp_goldat10 opp_xpat10 opp_csat10 golddiffat10 xpdiffat10 csdiffat10 killsat10 assistsat10 deathsat10 opp_killsat10 opp_assistsat10 opp_deathsat10 goldat15 xpat15 csat15 opp_goldat15 opp_xpat15 opp_csat15 golddiffat15 xpdiffat15 csdiffat15 killsat15 assistsat15 deathsat15 opp_killsat15 opp_assistsat15 opp_deathsat15 goldat20 xpat20 csat20 opp_goldat20 opp_xpat20 opp_csat20 golddiffat20 xpdiffat20 csdiffat20 killsat20 assistsat20 deathsat20 opp_killsat20 opp_assistsat20 opp_deathsat20 goldat25 xpat25 csat25 opp_goldat25 opp_xpat25 opp_csat25 golddiffat25 xpdiffat25 csdiffat25 killsat25 assistsat25 deathsat25 opp_killsat25 opp_assistsat25 opp_deathsat25
0 10660-10660_game_1 partial https://lpl.qq.com/es/stats.shtml?bmid=10660 DCup 2023 NaN 0 2024-01-01 05:13:15 1 13.24 1 Blue top Zika oe:player:65ed20b21e2993fb00dbd21a2fd991b LNG Esports oe:team:a9145b7711873f53e610fbba0493484 Aatrox Akali Nocturne K'Sante Lee Sin Wukong NaN NaN NaN NaN NaN 1886 0 1 3 1 3 16 NaN NaN NaN NaN NaN 0.0 NaN NaN 0.10 0.6 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 7092 225.62 0.17 564.15 NaN 14 0.45 4 0.13 5 24 0.76 11083 6960 221.42 0.24 10784 NaN NaN 279.0 256.0 23 16.0 0.0 8.88 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 10660-10660_game_1 partial https://lpl.qq.com/es/stats.shtml?bmid=10660 DCup 2023 NaN 0 2024-01-01 05:13:15 1 13.24 2 Blue jng Weiwei oe:player:57da8dfcfbdb4e5b019fe93003db1c4 LNG Esports oe:team:a9145b7711873f53e610fbba0493484 Maokai Akali Nocturne K'Sante Lee Sin Wukong NaN NaN NaN NaN NaN 1886 0 0 4 3 3 16 NaN NaN NaN NaN NaN 0.0 NaN NaN 0.10 0.6 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 7361 234.18 0.18 847.48 NaN 10 0.32 12 0.38 10 39 1.24 8636 4513 143.57 0.15 8840 NaN NaN 153.0 14.0 139 111.0 3.0 4.87 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 10660-10660_game_1 partial https://lpl.qq.com/es/stats.shtml?bmid=10660 DCup 2023 NaN 0 2024-01-01 05:13:15 1 13.24 3 Blue mid Scout oe:player:71e79ef80600d398d90cfebe3b0b758 LNG Esports oe:team:a9145b7711873f53e610fbba0493484 Orianna Akali Nocturne K'Sante Lee Sin Wukong NaN NaN NaN NaN NaN 1886 0 0 2 0 3 16 NaN NaN NaN NaN NaN 0.0 NaN NaN 0.10 0.6 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 10005 318.29 0.24 432.22 NaN 4 0.13 8 0.25 2 31 0.99 10743 6620 210.60 0.23 10594 NaN NaN 270.0 269.0 1 1.0 0.0 8.59 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 10660-10660_game_1 partial https://lpl.qq.com/es/stats.shtml?bmid=10660 DCup 2023 NaN 0 2024-01-01 05:13:15 1 13.24 4 Blue bot GALA oe:player:867e8957fae1cb59f0808dbcc3aada2 LNG Esports oe:team:a9145b7711873f53e610fbba0493484 Kalista Akali Nocturne K'Sante Lee Sin Wukong NaN NaN NaN NaN NaN 1886 0 2 4 0 3 16 NaN NaN NaN NaN NaN 0.0 NaN NaN 0.10 0.6 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 10892 346.51 0.26 491.45 NaN 22 0.70 13 0.41 4 44 1.40 12224 8101 257.72 0.28 11119 NaN NaN 311.0 307.0 4 0.0 0.0 9.89 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 10660-10660_game_1 partial https://lpl.qq.com/es/stats.shtml?bmid=10660 DCup 2023 NaN 0 2024-01-01 05:13:15 1 13.24 5 Blue sup Mark oe:player:a74c2977c1fc826e9e7bdb6b224a141 LNG Esports oe:team:a9145b7711873f53e610fbba0493484 Senna Akali Nocturne K'Sante Lee Sin Wukong NaN NaN NaN NaN NaN 1886 0 0 3 3 3 16 NaN NaN NaN NaN NaN 0.0 NaN NaN 0.10 0.6 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 6451 205.23 0.15 239.71 NaN 47 1.50 22 0.70 12 111 3.53 7221 3098 98.56 0.11 6175 NaN NaN 30.0 30.0 0 0.0 0.0 0.95 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
5 10660-10660_game_1 partial https://lpl.qq.com/es/stats.shtml?bmid=10660 DCup 2023 NaN 0 2024-01-01 05:13:15 1 13.24 6 Red top Xiaoxu oe:player:8007ba25dee37ac1e4445a6e9f8d252 Rare Atom oe:team:8516ca63facc91286d6c00212ca945e Rumble Poppy Ashe Neeko Vi Jarvan IV NaN NaN NaN NaN NaN 1886 1 4 0 6 16 3 NaN NaN NaN NaN NaN 1.0 NaN NaN 0.51 0.6 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 14049 446.95 0.25 228.90 NaN 11 0.35 3 0.10 5 26 0.83 13378 9255 294.43 0.23 11179 NaN NaN 283.0 245.0 38 15.0 6.0 9.00 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
6 10660-10660_game_1 partial https://lpl.qq.com/es/stats.shtml?bmid=10660 DCup 2023 NaN 0 2024-01-01 05:13:15 1 13.24 7 Red jng naiyou oe:player:a7b51467f09577883d7150f37393964 Rare Atom oe:team:8516ca63facc91286d6c00212ca945e Rell Poppy Ashe Neeko Vi Jarvan IV NaN NaN NaN NaN NaN 1886 1 1 0 12 16 3 NaN NaN NaN NaN NaN 0.0 NaN NaN 0.51 0.6 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 3945 125.50 0.07 591.03 NaN 14 0.45 8 0.25 11 49 1.56 10590 6467 205.74 0.16 9455 NaN NaN 169.0 12.0 157 91.0 21.0 5.38 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
7 10660-10660_game_1 partial https://lpl.qq.com/es/stats.shtml?bmid=10660 DCup 2023 NaN 0 2024-01-01 05:13:15 1 13.24 8 Red mid VicLa oe:player:d709285b163a94af9d819e568c592ba Rare Atom oe:team:8516ca63facc91286d6c00212ca945e LeBlanc Poppy Ashe Neeko Vi Jarvan IV NaN NaN NaN NaN NaN 1886 1 4 0 7 16 3 NaN NaN NaN NaN NaN 0.0 NaN NaN 0.51 0.6 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 14917 474.56 0.26 444.11 NaN 10 0.32 14 0.45 8 44 1.40 14603 10480 333.40 0.25 12643 NaN NaN 329.0 319.0 10 7.0 2.0 10.47 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
8 10660-10660_game_1 partial https://lpl.qq.com/es/stats.shtml?bmid=10660 DCup 2023 NaN 0 2024-01-01 05:13:15 1 13.24 9 Red bot Assum oe:player:da3299cd46f1ad8a86bbadeb6b8a320 Rare Atom oe:team:8516ca63facc91286d6c00212ca945e Varus Poppy Ashe Neeko Vi Jarvan IV NaN NaN NaN NaN NaN 1886 1 7 1 5 16 3 NaN NaN NaN NaN NaN 0.0 NaN NaN 0.51 0.6 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 19516 620.87 0.34 268.57 NaN 11 0.35 14 0.45 7 41 1.30 15021 10898 346.70 0.27 13820 NaN NaN 303.0 294.0 9 9.0 0.0 9.64 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
9 10660-10660_game_1 partial https://lpl.qq.com/es/stats.shtml?bmid=10660 DCup 2023 NaN 0 2024-01-01 05:13:15 1 13.24 10 Red sup Zorah oe:player:937dc5479c6416d1ad4997538144f6e Rare Atom oe:team:8516ca63facc91286d6c00212ca945e Renata Glasc Poppy Ashe Neeko Vi Jarvan IV NaN NaN NaN NaN NaN 1886 1 0 2 13 16 3 NaN NaN NaN NaN NaN 0.0 NaN NaN 0.51 0.6 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 4516 143.67 0.08 384.43 NaN 76 2.42 10 0.32 25 117 3.72 8145 4022 127.95 0.10 7110 NaN NaN 16.0 16.0 0 0.0 0.0 0.51 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10 10660-10660_game_1 partial https://lpl.qq.com/es/stats.shtml?bmid=10660 DCup 2023 NaN 0 2024-01-01 05:13:15 1 13.24 100 Blue team NaN NaN LNG Esports oe:team:a9145b7711873f53e610fbba0493484 NaN Akali Nocturne K'Sante Lee Sin Wukong Kalista Senna Orianna Maokai Aatrox 1886 0 3 16 7 3 16 NaN NaN NaN NaN 0.0 NaN NaN NaN 0.10 0.6 NaN 2.0 3.0 NaN NaN NaN NaN NaN NaN NaN NaN 2.0 NaN NaN NaN NaN NaN NaN NaN NaN 0.0 2.0 NaN 2.0 9.0 NaN NaN NaN NaN 0.0 1.0 41801 1329.83 NaN 2574.97 NaN 97 3.09 59 1.88 33 250 7.95 49907 29292 931.88 NaN 47512 -0.13 NaN NaN NaN 167 127.0 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
11 10660-10660_game_1 partial https://lpl.qq.com/es/stats.shtml?bmid=10660 DCup 2023 NaN 0 2024-01-01 05:13:15 1 13.24 200 Red team NaN NaN Rare Atom oe:team:8516ca63facc91286d6c00212ca945e NaN Poppy Ashe Neeko Vi Jarvan IV Renata Glasc Varus LeBlanc Rell Rumble 1886 1 16 3 43 16 3 NaN NaN NaN NaN 1.0 NaN NaN NaN 0.51 0.6 NaN 3.0 2.0 NaN NaN NaN NaN NaN NaN NaN NaN 3.0 NaN NaN NaN NaN NaN NaN NaN NaN 2.0 0.0 NaN 9.0 2.0 NaN NaN NaN NaN 1.0 0.0 56942 1811.52 NaN 1917.04 NaN 122 3.88 49 1.56 56 277 8.81 61737 41122 1308.23 NaN 54207 0.13 NaN NaN NaN 213 121.0 29.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
In [247]:
data.shape
Out[247]:
(117576, 161)
In [248]:
data.iloc[10:12]
Out[248]:
gameid datacompleteness url league year split playoffs date game patch participantid side position playername playerid teamname teamid champion ban1 ban2 ban3 ban4 ban5 pick1 pick2 pick3 pick4 pick5 gamelength result kills deaths assists teamkills teamdeaths doublekills triplekills quadrakills pentakills firstblood firstbloodkill firstbloodassist firstbloodvictim team kpm ckpm firstdragon dragons opp_dragons elementaldrakes opp_elementaldrakes infernals mountains clouds oceans chemtechs hextechs dragons (type unknown) elders opp_elders firstherald heralds opp_heralds void_grubs opp_void_grubs firstbaron barons opp_barons firsttower towers opp_towers firstmidtower firsttothreetowers turretplates opp_turretplates inhibitors opp_inhibitors damagetochampions dpm damageshare damagetakenperminute damagemitigatedperminute wardsplaced wpm wardskilled wcpm controlwardsbought visionscore vspm totalgold earnedgold earned gpm earnedgoldshare goldspent gspd gpr total cs minionkills monsterkills monsterkillsownjungle monsterkillsenemyjungle cspm goldat10 xpat10 csat10 opp_goldat10 opp_xpat10 opp_csat10 golddiffat10 xpdiffat10 csdiffat10 killsat10 assistsat10 deathsat10 opp_killsat10 opp_assistsat10 opp_deathsat10 goldat15 xpat15 csat15 opp_goldat15 opp_xpat15 opp_csat15 golddiffat15 xpdiffat15 csdiffat15 killsat15 assistsat15 deathsat15 opp_killsat15 opp_assistsat15 opp_deathsat15 goldat20 xpat20 csat20 opp_goldat20 opp_xpat20 opp_csat20 golddiffat20 xpdiffat20 csdiffat20 killsat20 assistsat20 deathsat20 opp_killsat20 opp_assistsat20 opp_deathsat20 goldat25 xpat25 csat25 opp_goldat25 opp_xpat25 opp_csat25 golddiffat25 xpdiffat25 csdiffat25 killsat25 assistsat25 deathsat25 opp_killsat25 opp_assistsat25 opp_deathsat25
10 10660-10660_game_1 partial https://lpl.qq.com/es/stats.shtml?bmid=10660 DCup 2023 NaN 0 2024-01-01 05:13:15 1 13.24 100 Blue team NaN NaN LNG Esports oe:team:a9145b7711873f53e610fbba0493484 NaN Akali Nocturne K'Sante Lee Sin Wukong Kalista Senna Orianna Maokai Aatrox 1886 0 3 16 7 3 16 NaN NaN NaN NaN 0.0 NaN NaN NaN 0.10 0.6 NaN 2.0 3.0 NaN NaN NaN NaN NaN NaN NaN NaN 2.0 NaN NaN NaN NaN NaN NaN NaN NaN 0.0 2.0 NaN 2.0 9.0 NaN NaN NaN NaN 0.0 1.0 41801 1329.83 NaN 2574.97 NaN 97 3.09 59 1.88 33 250 7.95 49907 29292 931.88 NaN 47512 -0.13 NaN NaN NaN 167 127.0 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
11 10660-10660_game_1 partial https://lpl.qq.com/es/stats.shtml?bmid=10660 DCup 2023 NaN 0 2024-01-01 05:13:15 1 13.24 200 Red team NaN NaN Rare Atom oe:team:8516ca63facc91286d6c00212ca945e NaN Poppy Ashe Neeko Vi Jarvan IV Renata Glasc Varus LeBlanc Rell Rumble 1886 1 16 3 43 16 3 NaN NaN NaN NaN 1.0 NaN NaN NaN 0.51 0.6 NaN 3.0 2.0 NaN NaN NaN NaN NaN NaN NaN NaN 3.0 NaN NaN NaN NaN NaN NaN NaN NaN 2.0 0.0 NaN 9.0 2.0 NaN NaN NaN NaN 1.0 0.0 56942 1811.52 NaN 1917.04 NaN 122 3.88 49 1.56 56 277 8.81 61737 41122 1308.23 NaN 54207 0.13 NaN NaN NaN 213 121.0 29.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

Step 2: Data Cleaning and Exploratory Data Analysis¶

2.1 Extract team data and target columns¶

In [249]:
target_columns = ['result', 'side', 'firstblood', 'firstdragon', 'firstbaron', 'firsttower',
                  'firstmidtower', 'firsttothreetowers', 'gamelength', 'golddiffat10', 'golddiffat15', 
                  'golddiffat20', 'xpdiffat10', 'xpdiffat15', 'xpdiffat20']
data = data.loc[data['position'] == 'team', target_columns].reset_index().drop('index', axis=1)
data.head()
Out[249]:
result side firstblood firstdragon firstbaron firsttower firstmidtower firsttothreetowers gamelength golddiffat10 golddiffat15 golddiffat20 xpdiffat10 xpdiffat15 xpdiffat20
0 0 Blue 0.0 NaN NaN NaN NaN NaN 1886 NaN NaN NaN NaN NaN NaN
1 1 Red 1.0 NaN NaN NaN NaN NaN 1886 NaN NaN NaN NaN NaN NaN
2 0 Blue 0.0 NaN NaN NaN NaN NaN 1911 NaN NaN NaN NaN NaN NaN
3 1 Red 1.0 NaN NaN NaN NaN NaN 1911 NaN NaN NaN NaN NaN NaN
4 1 Blue 1.0 NaN NaN NaN NaN NaN 1324 NaN NaN NaN NaN NaN NaN
In [250]:
data.shape
Out[250]:
(19596, 15)

2.2 Check and modify NaN¶

In [251]:
data.isna().sum()
Out[251]:
result                   0
side                     0
firstblood               0
firstdragon           2782
firstbaron            2782
firsttower            2782
firstmidtower         2784
firsttothreetowers    2782
gamelength               0
golddiffat10          2784
golddiffat15          2786
golddiffat20          2822
xpdiffat10            2784
xpdiffat15            2786
xpdiffat20            2822
dtype: int64
In [252]:
need_drop = ['firsttower', 'firstmidtower', 'firsttothreetowers', 'golddiffat10', 'golddiffat15', 'golddiffat20',
             'xpdiffat10', 'xpdiffat15', 'xpdiffat20']
data = data.dropna(subset=need_drop)
data.isna().sum()
Out[252]:
result                0
side                  0
firstblood            0
firstdragon           0
firstbaron            0
firsttower            0
firstmidtower         0
firsttothreetowers    0
gamelength            0
golddiffat10          0
golddiffat15          0
golddiffat20          0
xpdiffat10            0
xpdiffat15            0
xpdiffat20            0
dtype: int64
In [253]:
data.shape
Out[253]:
(16774, 15)

2.3 Categorize Gamelength¶

In [254]:
data['gamelength'].min()
Out[254]:
1143
In [255]:
data['gamelength'].max()
Out[255]:
3482
In [256]:
fig = px.histogram(
    data,
    x='gamelength',
    nbins=150,
    title='Game Count by Game Duration (seconds)',
    marginal='box',
    color_discrete_sequence=['#AB63FA'],
    width=700,
    height=400
)
fig.update_layout(
    xaxis_title='Game Duration (seconds)',
    yaxis_title='Number of Games'
)

fig.show()
In [257]:
fig.write_html("gamelength_hist.html")
In [258]:
gametime = ['<=25(mins)', '25-30(mins)', '30-35(mins)', '35-40(mins)', '>=40(mins)']
def group_time(time):
    if time <= 1499:
        return gametime[0]
    elif 1500 <= time <= 1799:
        return gametime[1]
    elif 1800 <= time <= 2099:
        return gametime[2]
    elif 2100 <= time <= 2399:
        return gametime[3]
    else:
        return gametime[4]

data = data.assign(time_label = data['gamelength'].apply(group_time)).drop('gamelength', axis=1)
In [259]:
data['time_label'].value_counts()
Out[259]:
time_label
30-35(mins)    5522
25-30(mins)    5348
35-40(mins)    2714
<=25(mins)     1786
>=40(mins)     1404
Name: count, dtype: int64
In [260]:
data['time_label'].value_counts().reindex(gametime)
Out[260]:
time_label
<=25(mins)     1786
25-30(mins)    5348
30-35(mins)    5522
35-40(mins)    2714
>=40(mins)     1404
Name: count, dtype: int64
In [261]:
counts = data['time_label'].value_counts().reindex(gametime).reset_index()

fig = px.bar(
    counts,
    x='time_label',
    y='count',
    title='Game Count by Game Duration (minutes)',
    color_discrete_sequence=['#AB63FA'],
    width=700,
    height=400
)
fig.update_layout(
    xaxis_title='Game Duration (minutes)',
    yaxis_title='Number of Games'
)
fig.show()
In [262]:
fig.write_html("gameduration_hist.html")

2.4 Recategorize result as win¶

In [263]:
data = data.assign(win = data['result'].apply(lambda x: True if x == 1 else False)).drop('result', axis=1)
In [264]:
from tabulate import tabulate
print(data.head().to_markdown())
|    | side   |   firstblood |   firstdragon |   firstbaron |   firsttower |   firstmidtower |   firsttothreetowers |   golddiffat10 |   golddiffat15 |   golddiffat20 |   xpdiffat10 |   xpdiffat15 |   xpdiffat20 | time_label   | win   |
|---:|:-------|-------------:|--------------:|-------------:|-------------:|----------------:|---------------------:|---------------:|---------------:|---------------:|-------------:|-------------:|-------------:|:-------------|:------|
| 30 | Blue   |            0 |             1 |            1 |            1 |               1 |                    1 |           1364 |           2293 |           4248 |          557 |          949 |         2138 | <=25(mins)   | True  |
| 31 | Red    |            1 |             0 |            0 |            0 |               0 |                    0 |          -1364 |          -2293 |          -4248 |         -557 |         -949 |        -2138 | <=25(mins)   | False |
| 32 | Blue   |            0 |             0 |            0 |            0 |               0 |                    0 |            -88 |            -75 |            777 |          625 |         1092 |         2722 | 35-40(mins)  | True  |
| 33 | Red    |            1 |             1 |            1 |            1 |               1 |                    1 |             88 |             75 |           -777 |         -625 |        -1092 |        -2722 | 35-40(mins)  | False |
| 34 | Blue   |            0 |             1 |            1 |            0 |               0 |                    0 |          -2583 |           -561 |          -1528 |        -1718 |          410 |         -722 | 30-35(mins)  | True  |
In [265]:
data.shape
Out[265]:
(16774, 15)

2.5 golddiffat10 distribution¶

In [266]:
df = data.loc[data['side'] == 'Red']
fig = px.histogram(
    df,
    x='golddiffat10',
    nbins=150,
    title='Team Count by Gold Difference at 10 minutes',
    marginal='box',
    color_discrete_sequence=['#FF4040'],
    width=700,
    height=400
)
fig.update_layout(
    xaxis_title='Gold Difference at 10 minutes',
    yaxis_title='Number of Teams'
)
lower, upper = df['golddiffat10'].quantile([0.025, 0.975])
fig.add_vline(
    x=lower,
    line_dash='dash',
    line_color='red',
    line_width=2,
    annotation_text=f'2.5% ({lower:.0f})',  
    annotation_position='top left',
    annotation_font_color='black',
    annotation_bgcolor='white'
)

fig.add_vline(
    x=upper,
    line_dash='dash',
    line_color='red',
    line_width=2,
    annotation_text=f'97.5% ({upper:.0f})',
    annotation_position='top right',
    annotation_font_color='black',
    annotation_bgcolor='white'
)

fig.show()
In [267]:
df = data.loc[data['side'] == 'Blue']
fig = px.histogram(
    df,
    x='golddiffat10',
    nbins=150,
    title='Team Count by Gold Difference at 10 minutes',
    marginal='box',
    color_discrete_sequence=['#1E90FF'],
    width=700,
    height=400
)
fig.update_layout(
    xaxis_title='Gold Difference at 10 minutes',
    yaxis_title='Number of Teams'
)
lower, upper = df['golddiffat10'].quantile([0.025, 0.975])

fig.add_vline(
    x=lower,
    line_dash='dash',
    line_color='#1E90FF',
    line_width=2,
    annotation_text=f'2.5% ({lower:.0f})', 
    annotation_position='top left',
    annotation_font_color='black',
    annotation_bgcolor='white'
)


fig.add_vline(
    x=upper,
    line_dash='dash',
    line_color='#1E90FF',
    line_width=2,
    annotation_text=f'97.5% ({upper:.0f})',
    annotation_position='top right',
    annotation_font_color='black',
    annotation_bgcolor='white'
)

fig.show()
In [268]:
df = data.loc[data['side'] == 'Red']
fig = px.histogram(
    df,
    x='xpdiffat10',
    nbins=150,
    title='Team Count by XP Difference at 10 minutes',
    marginal='box',
    color_discrete_sequence=['#CE2029'],
    width=700,
    height=400
)
fig.update_layout(
    xaxis_title='XP Difference at 10 minutes',
    yaxis_title='Number of Teams'
)
lower, upper = df['xpdiffat10'].quantile([0.025, 0.975])
fig.add_vline(
    x=lower,
    line_dash='dash',
    line_color='#CE2029',
    line_width=2,
    annotation_text=f'2.5% ({lower:.0f})',  
    annotation_position='top left',
    annotation_font_color='black',
    annotation_bgcolor='white'
)

fig.add_vline(
    x=upper,
    line_dash='dash',
    line_color='#CE2029',
    line_width=2,
    annotation_text=f'97.5% ({upper:.0f})',
    annotation_position='top right',
    annotation_font_color='black',
    annotation_bgcolor='white'
)

fig.show()
In [269]:
fig.write_html("xp10_red.html")
In [270]:
df = data.loc[data['side'] == 'Blue']
fig = px.histogram(
    df,
    x='xpdiffat10',
    nbins=150,
    title='Team Count by XP Difference at 10 minutes',
    marginal='box',
    color_discrete_sequence=['#4682B4'],
    width=700,
    height=400
)
fig.update_layout(
    xaxis_title='XP Difference at 10 minutes',
    yaxis_title='Number of Teams'
)
lower, upper = df['xpdiffat10'].quantile([0.025, 0.975])
fig.add_vline(
    x=lower,
    line_dash='dash',
    line_color='#4682B4',
    line_width=2,
    annotation_text=f'2.5% ({lower:.0f})',  
    annotation_position='top left',
    annotation_font_color='black',
    annotation_bgcolor='white'
)

fig.add_vline(
    x=upper,
    line_dash='dash',
    line_color='#4682B4',
    line_width=2,
    annotation_text=f'97.5% ({upper:.0f})',
    annotation_position='top right',
    annotation_font_color='black',
    annotation_bgcolor='white'
)

fig.show()
In [271]:
fig.write_html("xp10_blue.html")

2.6 First* Info Explore¶

In [272]:
target_columns = ['firstblood', 'firstdragon', 'firstbaron', 'firsttower',
                  'firstmidtower', 'firsttothreetowers']
data[target_columns] = data[target_columns] == 1
In [273]:
fig = px.bar(
    data.groupby(['side', 'firstblood'])['win'].mean().reset_index(),
    x='firstblood',
    y='win',
    color='side',
    barmode='group',
    color_discrete_map={'Blue': 'steelblue', 'Red': 'crimson'},
    title='Win Rate by Side and First Blood',
    width=700,
    height=400
)

fig.update_layout(
    xaxis_title='First Blood',
    yaxis_title='Average Win Rate'
)

fig.show()
In [274]:
fig.write_html("win_rate_blood.html")
In [275]:
fig = px.bar(
    data.groupby(['side', 'firstdragon'])['win'].mean().reset_index(),
    x='firstdragon',
    y='win',
    color='side',
    barmode='group',
    color_discrete_map={'Blue': 'steelblue', 'Red': 'crimson'},
    title='Win Rate by Side and First Dragon',
    width=700,
    height=400
)

fig.update_layout(
    xaxis_title='First Dragon',
    yaxis_title='Average Win Rate'
)

fig.show()
In [276]:
fig.write_html("win_rate_dragon.html")
In [277]:
fig = px.bar(
    data.groupby(['side', 'firstbaron'])['win'].mean().reset_index(),
    x='firstbaron',
    y='win',
    color='side',
    barmode='group',
    color_discrete_map={'Blue': 'steelblue', 'Red': 'crimson'},
    title='Win Rate by Side and First Baron',
    width=700,
    height=400
)

fig.update_layout(
    xaxis_title='First Baron',
    yaxis_title='Average Win Rate'
)

fig.show()
In [278]:
fig = px.bar(
    data.groupby(['side', 'firsttower'])['win'].mean().reset_index(),
    x='firsttower',
    y='win',
    color='side',
    barmode='group',
    color_discrete_map={'Blue': 'steelblue', 'Red': 'crimson'},
    title='Win Rate by Side and First Tower',
    width=700,
    height=400
)

fig.update_layout(
    xaxis_title='First Tower',
    yaxis_title='Average Win Rate'
)

fig.show()
In [279]:
fig = px.bar(
    data.groupby(['side', 'firstmidtower'])['win'].mean().reset_index(),
    x='firstmidtower',
    y='win',
    color='side',
    barmode='group',
    color_discrete_map={'Blue': 'steelblue', 'Red': 'crimson'},
    title='Win Rate by Side and First Mid Tower',
    width=700,
    height=400
)

fig.update_layout(
    xaxis_title='First Mid Tower',
    yaxis_title='Average Win Rate'
)

fig.show()
In [280]:
fig = px.bar(
    data.groupby(['side', 'firsttothreetowers'])['win'].mean().reset_index(),
    x='firsttothreetowers',
    y='win',
    color='side',
    barmode='group',
    color_discrete_map={'Blue': 'steelblue', 'Red': 'crimson'},
    title='Win Rate by Side and First Three Tower',
    width=700,
    height=400
)

fig.update_layout(
    xaxis_title='First Three Tower',
    yaxis_title='Average Win Rate'
)

fig.show()
In [281]:
df1 = (
    data
    .groupby(['side', 'firsttothreetowers'])
    ['win']
    .mean()
    .reset_index()
)
df2 = (
    data
    .groupby(['side', 'firstmidtower'])
    ['win']
    .mean()
    .reset_index()
)
df3 = (
    data
    .groupby(['side', 'firsttower'])
    ['win']
    .mean()
    .reset_index()
)
df4 = (
    data
    .groupby(['side', 'firstdragon'])
    ['win']
    .mean()
    .reset_index()
)
df5 = (
    data
    .groupby(['side', 'firstbaron'])
    ['win']
    .mean()
    .reset_index()
)
df6 = (
    data
    .groupby(['side', 'firstblood'])
    ['win']
    .mean()
    .reset_index()
)
In [282]:
df1 = df1.rename(columns={'firsttothreetowers': 'First Info Result'})
df1['First Info Detail'] = 'First to Three Towers'

df2 = df2.rename(columns={'firstmidtower': 'First Info Result'})
df2['First Info Detail'] = 'First Mid Tower'

df3 = df3.rename(columns={'firsttower': 'First Info Result'})
df3['First Info Detail'] = 'First Tower'

df4 = df4.rename(columns={'firstdragon': 'First Info Result'})
df4['First Info Detail'] = 'First Dragon'

df5 = df5.rename(columns={'firstbaron': 'First Info Result'})
df5['First Info Detail'] = 'First Baron'

df6 = df6.rename(columns={'firstblood': 'First Info Result'})
df6['First Info Detail'] = 'First Blood'
In [283]:
df_all = pd.concat([df1, df2, df3, df4, df5, df6], ignore_index=True)
df_all['Side_First_Info'] = df_all['side'] + ' - ' + df_all['First Info Result'].astype(str)
df_all
Out[283]:
side First Info Result win First Info Detail Side_First_Info
0 Blue False 0.21 First to Three Towers Blue - False
1 Blue True 0.76 First to Three Towers Blue - True
2 Red False 0.24 First to Three Towers Red - False
3 Red True 0.79 First to Three Towers Red - True
4 Blue False 0.26 First Mid Tower Blue - False
5 Blue True 0.72 First Mid Tower Blue - True
6 Red False 0.28 First Mid Tower Red - False
7 Red True 0.74 First Mid Tower Red - True
8 Blue False 0.33 First Tower Blue - False
9 Blue True 0.69 First Tower Blue - True
10 Red False 0.31 First Tower Red - False
11 Red True 0.67 First Tower Red - True
12 Blue False 0.47 First Dragon Blue - False
13 Blue True 0.62 First Dragon Blue - True
14 Red False 0.38 First Dragon Red - False
15 Red True 0.53 First Dragon Red - True
16 Blue False 0.20 First Baron Blue - False
17 Blue True 0.85 First Baron Blue - True
18 Red False 0.17 First Baron Red - False
19 Red True 0.84 First Baron Red - True
20 Blue False 0.43 First Blood Blue - False
21 Blue True 0.62 First Blood Blue - True
22 Red False 0.38 First Blood Red - False
23 Red True 0.57 First Blood Red - True
In [284]:
color_map = {
    'Blue - False': '#4B8BBE',   
    'Blue - True': '#306998',   
    'Red - False': '#FF7F7F',  
    'Red - True': '#D62728'
}
In [285]:
desired_order = ['Red - False', 'Blue - False', 'Red - True', 'Blue - True']
fig = df_all.plot(kind='bar',
            x='First Info Detail',
            y='win',
            color='Side_First_Info',
            barmode='group',
            category_orders={
        'First Info Detail': [
            'First Blood', 'First Dragon', 'First Tower',
            'First Mid Tower', 'First to Three Towers', 'First Baron'
        ],
        'Side_First_Info': desired_order
    },            color_discrete_map=color_map,
            title='Win Rate by Side and Tower'
)
fig.update_layout(
    width=1000,
    height=400
)
fig
In [286]:
fig.write_html("win_rate_side_tower.html")

2.7 Explore side¶

In [287]:
target_columns = ['firstblood', 'firstdragon', 'firstbaron', 'firsttower',
                  'firstmidtower', 'firsttothreetowers', 'golddiffat10', 'golddiffat15', 
                  'golddiffat20', 'xpdiffat10', 'xpdiffat15', 'xpdiffat20', 'win']
df = data.groupby('side')[target_columns].mean()
df
Out[287]:
firstblood firstdragon firstbaron firsttower firstmidtower firsttothreetowers golddiffat10 golddiffat15 golddiffat20 xpdiffat10 xpdiffat15 xpdiffat20 win
side
Blue 0.52 0.38 0.50 0.55 0.57 0.57 144.92 331.16 523.68 66.9 94.46 95.87 0.53
Red 0.48 0.61 0.46 0.45 0.43 0.43 -144.92 -331.16 -523.68 -66.9 -94.46 -95.87 0.47
In [288]:
from tabulate import tabulate
print(df.to_markdown())
| side   |   firstblood |   firstdragon |   firstbaron |   firsttower |   firstmidtower |   firsttothreetowers |   golddiffat10 |   golddiffat15 |   golddiffat20 |   xpdiffat10 |   xpdiffat15 |   xpdiffat20 |      win |
|:-------|-------------:|--------------:|-------------:|-------------:|----------------:|---------------------:|---------------:|---------------:|---------------:|-------------:|-------------:|-------------:|---------:|
| Blue   |     0.516275 |      0.384643 |     0.501967 |     0.548706 |        0.572314 |             0.571837 |        144.923 |        331.158 |        523.683 |      66.8972 |      94.4559 |       95.871 | 0.527483 |
| Red    |     0.483725 |      0.61488  |     0.456421 |     0.451294 |        0.427686 |             0.428163 |       -144.923 |       -331.158 |       -523.683 |     -66.8972 |     -94.4559 |      -95.871 | 0.472517 |
In [289]:
df = data.pivot_table(index='side',
                 columns='time_label',
                 values='win',
                 aggfunc='mean').reindex(columns=gametime)
df
Out[289]:
time_label <=25(mins) 25-30(mins) 30-35(mins) 35-40(mins) >=40(mins)
side
Blue 0.6 0.52 0.52 0.51 0.53
Red 0.4 0.48 0.48 0.49 0.47
In [290]:
from tabulate import tabulate
print(df.to_markdown())
| side   |   <=25(mins) |   25-30(mins) |   30-35(mins) |   35-40(mins) |   >=40(mins) |
|:-------|-------------:|--------------:|--------------:|--------------:|-------------:|
| Blue   |     0.601344 |      0.522438 |      0.516117 |      0.511422 |      0.52849 |
| Red    |     0.398656 |      0.477562 |      0.483883 |      0.488578 |      0.47151 |

2.8 Explore (10/15/20) Info¶

In [291]:
(
    data
    .plot(kind='hist',
          x='golddiffat10',
          color='side',
          nbins=50, 
          color_discrete_map={
            'Blue': 'steelblue',
            'Red': 'crimson'
          },
          marginal='box',
          title='Distribution of Gold Difference at 10 mins')
)
In [292]:
(
    data
    .plot(kind='hist',
          x='golddiffat15',
          color='side',
          nbins=50, 
          color_discrete_map={
            'Blue': 'steelblue',
            'Red': 'crimson'
          },
          marginal='box',
          title='Distribution of Gold Difference at 15 mins')
)
In [293]:
(
    data
    .plot(kind='hist',
          x='golddiffat20',
          color='side',
          nbins=50, 
          color_discrete_map={
            'Blue': 'steelblue',
            'Red': 'crimson'
          },
          marginal='box',
          title='Distribution of Gold Difference at 20 mins')
)
In [294]:
(
    data
    .plot(kind='hist',
          x='golddiffat10',
          color='win',
          nbins=50, 
          color_discrete_map={
            'True': 'olive',
            'False': 'crimson'
          },
          marginal='box',
          title='Distribution of Gold Difference at 10 mins')
)
In [295]:
(
    data
    .plot(kind='hist',
          x='golddiffat15',
          color='win',
          nbins=50, 
          color_discrete_map={
            'True': 'olive',
            'False': 'crimson'
          },
          marginal='box',
          title='Distribution of Gold Difference at 15 mins')
)
In [296]:
fig = (
    data
    .plot(kind='hist',
          x='golddiffat20',
          color='win',
          nbins=50, 
          color_discrete_map={
            'True': 'olive',
            'False': 'crimson'
          },
          marginal='box',
          title='Distribution of Gold Difference at 20 mins')
)
fig.update_layout(
    width=800,
    height=400
)
fig
In [297]:
fig.write_html("gold_20_hist.html")
In [298]:
(
    data
    .plot(kind='hist',
          x='xpdiffat10',
          color='side',
          nbins=50, 
          color_discrete_map={
            'Blue': 'steelblue',
            'Red': 'crimson'
          },
          marginal='box',
          title='Distribution of XP Difference at 10 mins')
)
In [299]:
(
    data
    .plot(kind='hist',
          x='xpdiffat15',
          color='side',
          nbins=50, 
          color_discrete_map={
            'Blue': 'steelblue',
            'Red': 'crimson'
          },
          marginal='box',
          title='Distribution of XP Difference at 15 mins')
)
In [300]:
(
    data
    .plot(kind='hist',
          x='xpdiffat20',
          color='side',
          nbins=50, 
          color_discrete_map={
            'Blue': 'steelblue',
            'Red': 'crimson'
          },
          marginal='box',
          title='Distribution of XP Difference at 20 mins')
)
In [301]:
(
    data
    .plot(kind='hist',
          x='xpdiffat10',
          color='win',
          nbins=50, 
          color_discrete_map={
            'True': 'olive',
            'False': 'crimson'
          },
          marginal='box',
          title='Distribution of XP Difference at 10 mins')
)
In [302]:
(
    data
    .plot(kind='hist',
          x='xpdiffat15',
          color='win',
          nbins=50, 
          color_discrete_map={
            'True': 'olive',
            'False': 'crimson'
          },
          marginal='box',
          title='Distribution of XP Difference at 15 mins')
)
In [303]:
fig = (
    data
    .plot(kind='hist',
          x='xpdiffat20',
          color='win',
          nbins=50, 
          color_discrete_map={
            'True': 'olive',
            'False': 'crimson'
          },
          marginal='box',
          title='Distribution of XP Difference at 20 mins')
)
fig.update_layout(
    width=800,
    height=400
)
fig
In [304]:
fig.write_html("xp_20_hist.html")
In [305]:
fig = px.violin(
    data,
    y='xpdiffat10',
    color='time_label',
    box=True,
    category_orders={
        'time_label': ['<=25(mins)', '25-30(mins)', '30-35(mins)', '35-40(mins)', '>=40(mins)']
    },
    title='Distribution of XP Difference at 10 mins',
    orientation='v'
)

fig.update_layout(
    yaxis_title='XP Difference at 10 Minutes',
    width=700,
    height=400
)

fig.show()
In [306]:
fig.write_html("xp_gold_10_violin.html")
In [307]:
fig = px.scatter(
    data,
    x='golddiffat10',
    y='xpdiffat10',
    color='time_label',  # Optional: color by game duration group
    category_orders={'time_label': ['<=25(mins)', '25-30(mins)', '30-35(mins)', '35-40(mins)', '>=40(mins)']},
    title='XP Difference vs. Gold Difference at 10 Minutes',
    labels={
        'golddiffat10': 'Gold Difference at 10 Minutes',
        'xpdiffat10': 'XP Difference at 10 Minutes',
        'time_label': 'Game Duration Group'
    },
    width=800,
    height=500
)

fig.show()
In [308]:
fig.write_html("xp_gold_10_scatter.html")
In [309]:
(
    data
    .plot(kind='violin',
          y='xpdiffat15',
          color='time_label',
          box=True,
          category_orders={'time_label': ['<=25(mins)', '25-30(mins)', '30-35(mins)', '35-40(mins)', '>=40(mins)']},
          title='Distribution of XP Difference at 15 mins',
          orientation='v',)
)
In [310]:
(
    data
    .plot(kind='violin',
          y='xpdiffat20',
          color='time_label',
          box=True,
          category_orders={'time_label': ['<=25(mins)', '25-30(mins)', '30-35(mins)', '35-40(mins)', '>=40(mins)']},
          title='Distribution of XP Difference at 20 mins',
          orientation='v',)
)
In [311]:
(
    data
    .plot(kind='violin',
          y='golddiffat10',
          color='time_label',
          box=True,
          category_orders={'time_label': ['<=25(mins)', '25-30(mins)', '30-35(mins)', '35-40(mins)', '>=40(mins)']},
          title='Distribution of Gold Difference at 10 mins',
          orientation='v',)
)
In [312]:
(
    data
    .plot(kind='violin',
          y='golddiffat15',
          color='time_label',
          box=True,
          category_orders={'time_label': ['<=25(mins)', '25-30(mins)', '30-35(mins)', '35-40(mins)', '>=40(mins)']},
          title='Distribution of Gold Difference at 15 mins',
          orientation='v',)
)
In [313]:
(
    data
    .plot(kind='violin',
          y='golddiffat20',
          color='time_label',
          box=True,
          category_orders={'time_label': ['<=25(mins)', '25-30(mins)', '30-35(mins)', '35-40(mins)', '>=40(mins)']},
          title='Distribution of Gold Difference at 20 mins',
          orientation='v',)
)

Step 3: Framing a Prediction Problem¶

Whether a team wins or loses a match based on their in-game performance features collected by the 20-minute mark¶

Step 4: Baseline Model¶

In [314]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import make_column_transformer

X = data[['side', 'firstbaron', 'xpdiffat10']]
y = data['win']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)

def baseline_model(X_train, y_train):

    preprocessor = make_column_transformer(
        (OneHotEncoder(drop='first', handle_unknown='ignore'), ['side', 'firstbaron']),
        (StandardScaler(), ['xpdiffat10'])
    )

    model = make_pipeline(preprocessor, LogisticRegression())
    
    model.fit(X_train, y_train)
    
    return model

base = baseline_model(X_train, y_train)
base
Out[314]:
Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('onehotencoder',
                                                  OneHotEncoder(drop='first',
                                                                handle_unknown='ignore'),
                                                  ['side', 'firstbaron']),
                                                 ('standardscaler',
                                                  StandardScaler(),
                                                  ['xpdiffat10'])])),
                ('logisticregression', LogisticRegression())])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('onehotencoder',
                                                  OneHotEncoder(drop='first',
                                                                handle_unknown='ignore'),
                                                  ['side', 'firstbaron']),
                                                 ('standardscaler',
                                                  StandardScaler(),
                                                  ['xpdiffat10'])])),
                ('logisticregression', LogisticRegression())])
ColumnTransformer(transformers=[('onehotencoder',
                                 OneHotEncoder(drop='first',
                                               handle_unknown='ignore'),
                                 ['side', 'firstbaron']),
                                ('standardscaler', StandardScaler(),
                                 ['xpdiffat10'])])
['side', 'firstbaron']
OneHotEncoder(drop='first', handle_unknown='ignore')
['xpdiffat10']
StandardScaler()
LogisticRegression()

Step 5: Final Model¶

Logistic Regression¶

In [315]:
import numpy as np
from sklearn.preprocessing import FunctionTransformer

def compute_per_min(X):
    return ((X.iloc[:, 0] / 10 + X.iloc[:, 1] / 15 + X.iloc[:, 2] / 20) / 3).to_numpy().reshape(-1, 1)

def compute_tower_score(X):
    return X.sum(axis=1).to_numpy().reshape(-1, 1)

def compute_diff_drop(X):
    return (X.iloc[:, 0] - X.iloc[:, 1]).to_numpy().reshape(-1, 1)

X = data[['side', 'firstbaron', 'firsttothreetowers', 'firstmidtower', 'firsttower', 'firstdragon', 'firstblood',
          'xpdiffat10', 'xpdiffat15', 'xpdiffat20', 'golddiffat10', 'golddiffat15', 'golddiffat20']]
y = data['win']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)

def final_model_1(X_train, y_train):
    
    xp_per_min_transformer = make_pipeline(
        FunctionTransformer(func=compute_per_min),
        StandardScaler()
    )
    
    gold_per_min_transformer = make_pipeline(
        FunctionTransformer(func=compute_per_min),
        StandardScaler()
    )

    tower_score_transformer = make_pipeline(
        FunctionTransformer(func=compute_tower_score),
        StandardScaler()
    )

    gold_drop_1015_transformer = make_pipeline(
        FunctionTransformer(func=compute_diff_drop),
        StandardScaler()
    )

    gold_drop_1520_transformer = make_pipeline(
        FunctionTransformer(func=compute_diff_drop),
        StandardScaler()
    )

    xp_drop_1015_transformer = make_pipeline(
        FunctionTransformer(func=compute_diff_drop),
        StandardScaler()
    )

    xp_drop_1520_transformer = make_pipeline(
        FunctionTransformer(func=compute_diff_drop),
        StandardScaler()
    )
    
    
    preprocessor = make_column_transformer(
        (OneHotEncoder(drop='first'), ['side', 'firstbaron', 'firstdragon', 'firstblood']),
        (xp_per_min_transformer, ['xpdiffat10', 'xpdiffat15', 'xpdiffat20']),
        (gold_per_min_transformer, ['golddiffat10', 'golddiffat15', 'golddiffat20']),
        (tower_score_transformer, ['firsttower', 'firstmidtower', 'firsttothreetowers']),
        (gold_drop_1015_transformer, ['golddiffat10', 'golddiffat15']),
        (gold_drop_1520_transformer, ['golddiffat15', 'golddiffat20']),
        (xp_drop_1015_transformer, ['xpdiffat10', 'xpdiffat15']),
        (xp_drop_1520_transformer, ['xpdiffat15', 'xpdiffat20']),
    )
    
    model = make_pipeline(preprocessor, LogisticRegression())
        
    model.fit(X_train, y_train)
        
    return model

final1 = final_model_1(X_train, y_train)
final1
Out[315]:
Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('onehotencoder',
                                                  OneHotEncoder(drop='first'),
                                                  ['side', 'firstbaron',
                                                   'firstdragon',
                                                   'firstblood']),
                                                 ('pipeline-1',
                                                  Pipeline(steps=[('functiontransformer',
                                                                   FunctionTransformer(func=<function compute_per_min at 0x168741e10>)),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['xpdiffat10', 'xpdiffat15',
                                                   'x...
                                                                   FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['xpdiffat10', 'xpdiffat15']),
                                                 ('pipeline-7',
                                                  Pipeline(steps=[('functiontransformer',
                                                                   FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['xpdiffat15',
                                                   'xpdiffat20'])])),
                ('logisticregression', LogisticRegression())])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('onehotencoder',
                                                  OneHotEncoder(drop='first'),
                                                  ['side', 'firstbaron',
                                                   'firstdragon',
                                                   'firstblood']),
                                                 ('pipeline-1',
                                                  Pipeline(steps=[('functiontransformer',
                                                                   FunctionTransformer(func=<function compute_per_min at 0x168741e10>)),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['xpdiffat10', 'xpdiffat15',
                                                   'x...
                                                                   FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['xpdiffat10', 'xpdiffat15']),
                                                 ('pipeline-7',
                                                  Pipeline(steps=[('functiontransformer',
                                                                   FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['xpdiffat15',
                                                   'xpdiffat20'])])),
                ('logisticregression', LogisticRegression())])
ColumnTransformer(transformers=[('onehotencoder', OneHotEncoder(drop='first'),
                                 ['side', 'firstbaron', 'firstdragon',
                                  'firstblood']),
                                ('pipeline-1',
                                 Pipeline(steps=[('functiontransformer',
                                                  FunctionTransformer(func=<function compute_per_min at 0x168741e10>)),
                                                 ('standardscaler',
                                                  StandardScaler())]),
                                 ['xpdiffat10', 'xpdiffat15', 'xpdiffat20']),
                                ('pipeline-2',
                                 Pipeline(s...
                                 Pipeline(steps=[('functiontransformer',
                                                  FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)),
                                                 ('standardscaler',
                                                  StandardScaler())]),
                                 ['xpdiffat10', 'xpdiffat15']),
                                ('pipeline-7',
                                 Pipeline(steps=[('functiontransformer',
                                                  FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)),
                                                 ('standardscaler',
                                                  StandardScaler())]),
                                 ['xpdiffat15', 'xpdiffat20'])])
['side', 'firstbaron', 'firstdragon', 'firstblood']
OneHotEncoder(drop='first')
['xpdiffat10', 'xpdiffat15', 'xpdiffat20']
FunctionTransformer(func=<function compute_per_min at 0x168741e10>)
StandardScaler()
['golddiffat10', 'golddiffat15', 'golddiffat20']
FunctionTransformer(func=<function compute_per_min at 0x168741e10>)
StandardScaler()
['firsttower', 'firstmidtower', 'firsttothreetowers']
FunctionTransformer(func=<function compute_tower_score at 0x168741fc0>)
StandardScaler()
['golddiffat10', 'golddiffat15']
FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)
StandardScaler()
['golddiffat15', 'golddiffat20']
FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)
StandardScaler()
['xpdiffat10', 'xpdiffat15']
FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)
StandardScaler()
['xpdiffat15', 'xpdiffat20']
FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)
StandardScaler()
LogisticRegression()

Random Forest¶

In [316]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
def final_model_2(X_train, y_train, k=10):
    
    xp_per_min_transformer = make_pipeline(
        FunctionTransformer(func=compute_per_min),
        StandardScaler()
    )
    
    gold_per_min_transformer = make_pipeline(
        FunctionTransformer(func=compute_per_min),
        StandardScaler()
    )

    tower_score_transformer = make_pipeline(
        FunctionTransformer(func=compute_tower_score),
        StandardScaler()
    )

    gold_drop_1015_transformer = make_pipeline(
        FunctionTransformer(func=compute_diff_drop),
        StandardScaler()
    )

    gold_drop_1520_transformer = make_pipeline(
        FunctionTransformer(func=compute_diff_drop),
        StandardScaler()
    )

    xp_drop_1015_transformer = make_pipeline(
        FunctionTransformer(func=compute_diff_drop),
        StandardScaler()
    )

    xp_drop_1520_transformer = make_pipeline(
        FunctionTransformer(func=compute_diff_drop),
        StandardScaler()
    )
    
    
    preprocessor = make_column_transformer(
        (OneHotEncoder(drop='first'), ['side', 'firstbaron', 'firstdragon', 'firstblood']),
        (xp_per_min_transformer, ['xpdiffat10', 'xpdiffat15', 'xpdiffat20']),
        (gold_per_min_transformer, ['golddiffat10', 'golddiffat15', 'golddiffat20']),
        (tower_score_transformer, ['firsttower', 'firstmidtower', 'firsttothreetowers']),
        (gold_drop_1015_transformer, ['golddiffat10', 'golddiffat15']),
        (gold_drop_1520_transformer, ['golddiffat15', 'golddiffat20']),
        (xp_drop_1015_transformer, ['xpdiffat10', 'xpdiffat15']),
        (xp_drop_1520_transformer, ['xpdiffat15', 'xpdiffat20']),
    )

    pipe = make_pipeline(preprocessor, RandomForestClassifier(random_state=123))

    param_grid = {
        'randomforestclassifier__max_depth': np.arange(1, 11)
    }
    
    grid = GridSearchCV(pipe, param_grid, cv=k, scoring='roc_auc')

    grid.fit(X_train, y_train)

    return grid

final2 = final_model_2(X_train, y_train, 5)
final2
Out[316]:
GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('columntransformer',
                                        ColumnTransformer(transformers=[('onehotencoder',
                                                                         OneHotEncoder(drop='first'),
                                                                         ['side',
                                                                          'firstbaron',
                                                                          'firstdragon',
                                                                          'firstblood']),
                                                                        ('pipeline-1',
                                                                         Pipeline(steps=[('functiontransformer',
                                                                                          FunctionTransformer(func=<function compute_per_min at 0x168741e10>)),
                                                                                         ('standardscaler',
                                                                                          StandardScaler())]),
                                                                         [...
                                                                        ('pipeline-7',
                                                                         Pipeline(steps=[('functiontransformer',
                                                                                          FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)),
                                                                                         ('standardscaler',
                                                                                          StandardScaler())]),
                                                                         ['xpdiffat15',
                                                                          'xpdiffat20'])])),
                                       ('randomforestclassifier',
                                        RandomForestClassifier(random_state=123))]),
             param_grid={'randomforestclassifier__max_depth': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])},
             scoring='roc_auc')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('columntransformer',
                                        ColumnTransformer(transformers=[('onehotencoder',
                                                                         OneHotEncoder(drop='first'),
                                                                         ['side',
                                                                          'firstbaron',
                                                                          'firstdragon',
                                                                          'firstblood']),
                                                                        ('pipeline-1',
                                                                         Pipeline(steps=[('functiontransformer',
                                                                                          FunctionTransformer(func=<function compute_per_min at 0x168741e10>)),
                                                                                         ('standardscaler',
                                                                                          StandardScaler())]),
                                                                         [...
                                                                        ('pipeline-7',
                                                                         Pipeline(steps=[('functiontransformer',
                                                                                          FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)),
                                                                                         ('standardscaler',
                                                                                          StandardScaler())]),
                                                                         ['xpdiffat15',
                                                                          'xpdiffat20'])])),
                                       ('randomforestclassifier',
                                        RandomForestClassifier(random_state=123))]),
             param_grid={'randomforestclassifier__max_depth': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])},
             scoring='roc_auc')
Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('onehotencoder',
                                                  OneHotEncoder(drop='first'),
                                                  ['side', 'firstbaron',
                                                   'firstdragon',
                                                   'firstblood']),
                                                 ('pipeline-1',
                                                  Pipeline(steps=[('functiontransformer',
                                                                   FunctionTransformer(func=<function compute_per_min at 0x168741e10>)),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['xpdiffat10', 'xpdiffat15',
                                                   'x...
                                                                   FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['xpdiffat10', 'xpdiffat15']),
                                                 ('pipeline-7',
                                                  Pipeline(steps=[('functiontransformer',
                                                                   FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['xpdiffat15',
                                                   'xpdiffat20'])])),
                ('randomforestclassifier',
                 RandomForestClassifier(max_depth=6, random_state=123))])
ColumnTransformer(transformers=[('onehotencoder', OneHotEncoder(drop='first'),
                                 ['side', 'firstbaron', 'firstdragon',
                                  'firstblood']),
                                ('pipeline-1',
                                 Pipeline(steps=[('functiontransformer',
                                                  FunctionTransformer(func=<function compute_per_min at 0x168741e10>)),
                                                 ('standardscaler',
                                                  StandardScaler())]),
                                 ['xpdiffat10', 'xpdiffat15', 'xpdiffat20']),
                                ('pipeline-2',
                                 Pipeline(s...
                                 Pipeline(steps=[('functiontransformer',
                                                  FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)),
                                                 ('standardscaler',
                                                  StandardScaler())]),
                                 ['xpdiffat10', 'xpdiffat15']),
                                ('pipeline-7',
                                 Pipeline(steps=[('functiontransformer',
                                                  FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)),
                                                 ('standardscaler',
                                                  StandardScaler())]),
                                 ['xpdiffat15', 'xpdiffat20'])])
['side', 'firstbaron', 'firstdragon', 'firstblood']
OneHotEncoder(drop='first')
['xpdiffat10', 'xpdiffat15', 'xpdiffat20']
FunctionTransformer(func=<function compute_per_min at 0x168741e10>)
StandardScaler()
['golddiffat10', 'golddiffat15', 'golddiffat20']
FunctionTransformer(func=<function compute_per_min at 0x168741e10>)
StandardScaler()
['firsttower', 'firstmidtower', 'firsttothreetowers']
FunctionTransformer(func=<function compute_tower_score at 0x168741fc0>)
StandardScaler()
['golddiffat10', 'golddiffat15']
FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)
StandardScaler()
['golddiffat15', 'golddiffat20']
FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)
StandardScaler()
['xpdiffat10', 'xpdiffat15']
FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)
StandardScaler()
['xpdiffat15', 'xpdiffat20']
FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)
StandardScaler()
RandomForestClassifier(max_depth=6, random_state=123)

Decision Tree¶

In [317]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer
from sklearn.model_selection import GridSearchCV

def final_model_3(X_train, y_train):
    
    xp_per_min_transformer = make_pipeline(
        FunctionTransformer(func=compute_per_min),
        StandardScaler()
    )
    
    gold_per_min_transformer = make_pipeline(
        FunctionTransformer(func=compute_per_min),
        StandardScaler()
    )

    tower_score_transformer = make_pipeline(
        FunctionTransformer(func=compute_tower_score),
        StandardScaler()
    )

    gold_drop_1015_transformer = make_pipeline(
        FunctionTransformer(func=compute_diff_drop),
        StandardScaler()
    )

    gold_drop_1520_transformer = make_pipeline(
        FunctionTransformer(func=compute_diff_drop),
        StandardScaler()
    )

    xp_drop_1015_transformer = make_pipeline(
        FunctionTransformer(func=compute_diff_drop),
        StandardScaler()
    )

    xp_drop_1520_transformer = make_pipeline(
        FunctionTransformer(func=compute_diff_drop),
        StandardScaler()
    )
    
    
    preprocessor = make_column_transformer(
        (OneHotEncoder(drop='first'), ['side', 'firstbaron', 'firstdragon', 'firstblood']),
        (xp_per_min_transformer, ['xpdiffat10', 'xpdiffat15', 'xpdiffat20']),
        (gold_per_min_transformer, ['golddiffat10', 'golddiffat15', 'golddiffat20']),
        (tower_score_transformer, ['firsttower', 'firstmidtower', 'firsttothreetowers']),
        (gold_drop_1015_transformer, ['golddiffat10', 'golddiffat15']),
        (gold_drop_1520_transformer, ['golddiffat15', 'golddiffat20']),
        (xp_drop_1015_transformer, ['xpdiffat10', 'xpdiffat15']),
        (xp_drop_1520_transformer, ['xpdiffat15', 'xpdiffat20']),
    )

    pipe = make_pipeline(
        preprocessor,
        DecisionTreeClassifier(random_state=123)
    )

    param_grid = {
        'decisiontreeclassifier__max_depth': np.arange(1, 11)
    }

    grid = GridSearchCV(pipe, param_grid, cv=5, scoring='roc_auc')
    grid.fit(X_train, y_train)
    
    return grid

final3 = final_model_3(X_train, y_train)
final3
Out[317]:
GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('columntransformer',
                                        ColumnTransformer(transformers=[('onehotencoder',
                                                                         OneHotEncoder(drop='first'),
                                                                         ['side',
                                                                          'firstbaron',
                                                                          'firstdragon',
                                                                          'firstblood']),
                                                                        ('pipeline-1',
                                                                         Pipeline(steps=[('functiontransformer',
                                                                                          FunctionTransformer(func=<function compute_per_min at 0x168741e10>)),
                                                                                         ('standardscaler',
                                                                                          StandardScaler())]),
                                                                         [...
                                                                        ('pipeline-7',
                                                                         Pipeline(steps=[('functiontransformer',
                                                                                          FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)),
                                                                                         ('standardscaler',
                                                                                          StandardScaler())]),
                                                                         ['xpdiffat15',
                                                                          'xpdiffat20'])])),
                                       ('decisiontreeclassifier',
                                        DecisionTreeClassifier(random_state=123))]),
             param_grid={'decisiontreeclassifier__max_depth': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])},
             scoring='roc_auc')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('columntransformer',
                                        ColumnTransformer(transformers=[('onehotencoder',
                                                                         OneHotEncoder(drop='first'),
                                                                         ['side',
                                                                          'firstbaron',
                                                                          'firstdragon',
                                                                          'firstblood']),
                                                                        ('pipeline-1',
                                                                         Pipeline(steps=[('functiontransformer',
                                                                                          FunctionTransformer(func=<function compute_per_min at 0x168741e10>)),
                                                                                         ('standardscaler',
                                                                                          StandardScaler())]),
                                                                         [...
                                                                        ('pipeline-7',
                                                                         Pipeline(steps=[('functiontransformer',
                                                                                          FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)),
                                                                                         ('standardscaler',
                                                                                          StandardScaler())]),
                                                                         ['xpdiffat15',
                                                                          'xpdiffat20'])])),
                                       ('decisiontreeclassifier',
                                        DecisionTreeClassifier(random_state=123))]),
             param_grid={'decisiontreeclassifier__max_depth': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])},
             scoring='roc_auc')
Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('onehotencoder',
                                                  OneHotEncoder(drop='first'),
                                                  ['side', 'firstbaron',
                                                   'firstdragon',
                                                   'firstblood']),
                                                 ('pipeline-1',
                                                  Pipeline(steps=[('functiontransformer',
                                                                   FunctionTransformer(func=<function compute_per_min at 0x168741e10>)),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['xpdiffat10', 'xpdiffat15',
                                                   'x...
                                                                   FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['xpdiffat10', 'xpdiffat15']),
                                                 ('pipeline-7',
                                                  Pipeline(steps=[('functiontransformer',
                                                                   FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['xpdiffat15',
                                                   'xpdiffat20'])])),
                ('decisiontreeclassifier',
                 DecisionTreeClassifier(max_depth=5, random_state=123))])
ColumnTransformer(transformers=[('onehotencoder', OneHotEncoder(drop='first'),
                                 ['side', 'firstbaron', 'firstdragon',
                                  'firstblood']),
                                ('pipeline-1',
                                 Pipeline(steps=[('functiontransformer',
                                                  FunctionTransformer(func=<function compute_per_min at 0x168741e10>)),
                                                 ('standardscaler',
                                                  StandardScaler())]),
                                 ['xpdiffat10', 'xpdiffat15', 'xpdiffat20']),
                                ('pipeline-2',
                                 Pipeline(s...
                                 Pipeline(steps=[('functiontransformer',
                                                  FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)),
                                                 ('standardscaler',
                                                  StandardScaler())]),
                                 ['xpdiffat10', 'xpdiffat15']),
                                ('pipeline-7',
                                 Pipeline(steps=[('functiontransformer',
                                                  FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)),
                                                 ('standardscaler',
                                                  StandardScaler())]),
                                 ['xpdiffat15', 'xpdiffat20'])])
['side', 'firstbaron', 'firstdragon', 'firstblood']
OneHotEncoder(drop='first')
['xpdiffat10', 'xpdiffat15', 'xpdiffat20']
FunctionTransformer(func=<function compute_per_min at 0x168741e10>)
StandardScaler()
['golddiffat10', 'golddiffat15', 'golddiffat20']
FunctionTransformer(func=<function compute_per_min at 0x168741e10>)
StandardScaler()
['firsttower', 'firstmidtower', 'firsttothreetowers']
FunctionTransformer(func=<function compute_tower_score at 0x168741fc0>)
StandardScaler()
['golddiffat10', 'golddiffat15']
FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)
StandardScaler()
['golddiffat15', 'golddiffat20']
FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)
StandardScaler()
['xpdiffat10', 'xpdiffat15']
FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)
StandardScaler()
['xpdiffat15', 'xpdiffat20']
FunctionTransformer(func=<function compute_diff_drop at 0x1687436d0>)
StandardScaler()
DecisionTreeClassifier(max_depth=5, random_state=123)

ROC curves¶

In [318]:
def draw_roc_curves(models, X_test, y_test):
    all_roc_data = []

    for label, model in models.items():
        probs = model.predict_proba(X_test)[:, 1]
        fprs, tprs, thresholds = roc_curve(y_test.to_numpy(), probs)
        roc_auc = auc(fprs, tprs)

        for fpr, tpr in zip(fprs, tprs):
            all_roc_data.append({
                'FPR': fpr,
                'TPR': tpr,
                'Model': f'{label} (AUC = {roc_auc:.2f})'
            })

    df_roc = pd.DataFrame(all_roc_data)

    fig = px.line(
        df_roc,
        x='FPR',
        y='TPR',
        color='Model',
        title='ROC Curves for Multiple Models',
        labels={'FPR': 'False Positive Rate', 'TPR': 'True Positive Rate'},
        width=1000,
        height=600
    )

    fig.write_html("roc_curves.html")
    fig.update_layout(legend_title='Model (AUC)')
    fig.show()

models = {
    'Logistic Regression (base)': base,
    'Logistic Regression (final1)': final1,
    'Random Forest (final2)': final2,
    'Decision Tree (final3)': final3,
}

draw_roc_curves(models, X_test, y_test)

Confusion Matrices¶

In [319]:
def predict_thresholded(model, X_test, T):
    probs = model.predict_proba(X_test)[:, 1]
    return (probs >= T).astype(int)

def get_confusion_heatmap(model, X_test, y_test, T, title):
    y_pred = predict_thresholded(model, X_test, T)
    cm = confusion_matrix(y_test, y_pred)
    acc = accuracy_score(y_test, y_pred)

    return go.Heatmap(
        z=cm,
        x=['Predicted Negative', 'Predicted Positive'],
        y=['Actual Negative', 'Actual Positive'],
        colorscale='Blues',
        text=[['True Negatives (TN)', 'False Positives (FP)'],
              ['False Negatives (FN)', 'True Positives (TP)']],
        texttemplate='%{text}<br>%{z}',
        textfont=dict(size=11),
        hovertemplate='Count: %{z}<br>Category: %{text}',
        showscale=False,
        name=title
    ), acc

def show_multiple_confusions(models, X_test, y_test, T=0.5):

    heatmaps = []
    accs = []
    for i, (name, model) in enumerate(models.items()):
        heatmap, acc = get_confusion_heatmap(model, X_test, y_test, T, title=name)
        heatmaps.append(heatmap)
        accs.append((name, acc))
    
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=[f"{name}<br>Accuracy={acc:.3f}" for (name, acc) in accs],
        horizontal_spacing=0.2,
        vertical_spacing=0.12
    )

    for i, heatmap in enumerate(heatmaps):
        row = i // 2 + 1
        col = i % 2 + 1
        fig.add_trace(heatmap, row=row, col=col)

    fig.update_layout(
        width=800,
        height=750,
        title_text=f"Confusion Matrices for Multiple Models (Threshold = {T})",
        title_x=0.5,
        margin=dict(t=100)
    )
    fig.write_html("confusion_matrices.html")
    fig.update_yaxes(autorange='reversed')
    fig.show()

models = {
    'Basic Logistic Regression': base,
    'Final Logistic Regression': final1,
    'Random Forest': final2,
    'Decision Tree': final3
}

show_multiple_confusions(models, X_test, y_test, T=0.5)
In [ ]: